
global output ""
global data ""
cd $output

clear 

************************* 
** Data Appendix Table **
************************* 

use	  "$data/IL_officer_data_anon", clear
preserve 

* age_start
generate Bdate=date(dateofbirth,"MDY") 
format Bdate %d
by ptbid, sort: egen M_start = min(start_date)
gen age_start = (M_start - Bdate)/365
replace age_start = . if age_start<21
replace age_start = . if age_start>60

keep ptbid age_start sex race education
replace education = "High School or less" if education=="High School" | education == "Grade School"
replace education = "Bachelors Degree or more" if education=="Bachelors Degree" | education == "Doctorate" | education == "Masters Degree"

duplicates drop 

sum age_start
tab1 sex race education

restore 

********************************************
** Fig 1: length of stay within an agency **
********************************************

use	  "$data/IL_officer_data_anon", clear
sort ptbid start_date

*** Ignore changes within agency from PT to FT, as long as the new hire is w/in 6months in the same agency 
sort ptbid agency start_date
gen length_to_new = start_date[_n+1] - sep_date  if ptbid == ptbid[_n+1] & agency == agency[_n+1] & status != status[_n+1]
gen status_change = length_to_new !=. & length_to_new <= 180
replace sep_date = sep_date[_n+1] if status_change==1 
replace sepreason = sepreason[_n+1] if status_change==1 
drop if status_change[_n-1]==1 
drop status_change 


gen voluntary_sep = sepreason=="Resigned" | sepreason=="Left for new job" | sepreason=="Retired" 
gen involuntary_sep = sepreason=="Terminated"  | sepreason=="Charged/Convicted of Crime" 
gen other_sep = sep_date!=. & involuntary_sep==0 & voluntary_sep == 0
** Drop stints of 0 or 1 day 
gen length = sep_date -start_date 
drop if length < 2

sort ptbid start_date

** Drop new stint at the same agency. 
gen x = ptbid ==ptbid[_n-1] & agency == agency[_n-1]
drop if ptbid ==ptbid[_n-1] & agency == agency[_n-1]

gen leave_policing = sep_date!=. & ptbid !=ptbid[_n+1]
gen leave_other_job = sep_date!=. & ptbid ==ptbid[_n+1]

keep ptbid agency  start_date sep_date  voluntary_sep involuntary_sep leave_policing  leave_other_job

compress

duplicates drop 

gen length_position =  sep_date - start_date
replace length_position = . if length_position<0
gen length_pos = length_position
replace length_pos = d(15mar2021) - start_date if sep_date==.

** Year = leave that year 
forvalues i = 1(1)15{
local j = `i'-1
gen Year`i' = length_pos < 365*`i'& length_pos>=365*`j'
replace Year`i' = . if start_date + 365*`i' > d(15mar2021)
}

** Voluntarily separate that year 
forvalues i = 1(1)15{
local j = `i'-1
gen vol_Year`i' = length_pos < 365*`i'& length_pos>=365*`j' & voluntary_sep == 1

replace vol_Year`i' = . if start_date + 365*`i' > d(15mar2021)
}

** Involuntarily separate that year 
forvalues i = 1(1)15{
local j = `i'-1
gen invol_Year`i' = length_pos < 365*`i'& length_pos>=365*`j' & involuntary_sep == 1
replace invol_Year`i' = . if start_date + 365*`i' > d(15mar2021)
}

** Leaves voluntarily that year and doesn't work at a new agency in the future. 
forvalues i = 1(1)15{
local j = `i'-1
gen Quit_Year`i' = length_pos < 365*`i'& length_pos>=365*`j' & leave_policing == 1 & voluntary_sep == 1
replace Quit_Year`i' = . if start_date + 365*`i' > d(15mar2021)
}

** Leaves voluntarily that year, and works at another agency. 
forvalues i = 1(1)15{
local j = `i'-1
gen Change_Year`i' = length_pos < 365*`i'& length_pos>=365*`j' & leave_other_job == 1 & voluntary_sep == 1
replace Change_Year`i' = . if start_date + 365*`i' > 22366
}


** NB: Year is not equal to vol + invol because some people 

collapse (mean) Year* vol_Year* invol_Year* Quit_Year* Change_Year*
gen i = _n
reshape long Year vol_Year invol_Year other_Year Quit_Year Change_Year, i(i) j(years)
label var Year "Number of exits that year"
label var vol_Year "Number of voluntary exits that year"
label var invol_Year "Number of involuntary exits that year"
label var Quit_Year "Number of people leaving IL policing that year"
label var Change_Year "Number of people changing policing jobs in IL that year"

twoway (line Year years)  (line vol_Year years) (line invol_Year years), /// 
ytitle(Fraction separating) xtitle(Years at agency)  plotregion(margin(zero)) /// 
 yscale(range(0 0.2)) ylabel(0(0.05)0.2) ymtick(0(0.2)0.05) /// 
  legend(label(1 "Separations (total)") label(2 "Voluntary separations") label(3 "Involuntary separations"))
graph export "FIG1_Yearly_separations.png", replace


twoway (line Change_Year years)  (line Quit_Year years) , ///
ytitle(Fraction separating) xtitle(Years at agency)  plotregion(margin(zero)) /// 
 yscale(range(0 0.1)) ylabel(0(0.05)0.1) ymtick(0(0.1)0.05) legend(label(1 "Move to new IL agency") label(2 "Cease IL policing"))
graph export "FIG2_Yearly_quit_change.png", replace


*********************************************************
** Tab 2: in the first 5 years: who stays, who leaves? **
*********************************************************

use	  "$data/IL_officer_data_anon", clear

sort ptbid start_date
*** Ignore changes within agency from PT to FT, as long as the new hire is w/in 6months in the same agency 
sort ptbid agency start_date
gen length_to_new = start_date[_n+1] - sep_date  if ptbid == ptbid[_n+1] & agency == agency[_n+1] & status != status[_n+1]
gen status_change = length_to_new !=. & length_to_new <= 180
replace sep_date = sep_date[_n+1] if status_change==1 
replace sepreason = sepreason[_n+1] if status_change==1 
drop if status_change[_n-1]==1 
drop status_change 

** Drop stints of 0 or 1 day 
gen length = sep_date -start_date 
drop if length < 2
** Drop new stint at the same agency. 
drop if ptbid ==ptbid[_n-1] & agency == agency[_n-1]

** Keep officers whom we observe for at least 5Y. 
keep if 22366 - start_date > 365*5
replace sep_date = . if sep_date - start_date > 365*5

gen voluntary_sep = (sepreason=="Resigned" | sepreason=="Left for new job" | sepreason=="Retired" |  sepreason=="Other") & sep_date !=.
gen involuntary_sep = (sepreason=="Terminated"  | sepreason=="Charged/Convicted of Crime")  & sep_date !=.
gen leave_policing = sep_date!=. & ptbid !=ptbid[_n+1]  & sep_date !=.
gen leave_other_job = sep_date!=. & ptbid ==ptbid[_n+1]  & sep_date !=.
gen still_work =   sep_date ==.



generate Bdate=date(dateofbirth,"MDY") 
format Bdate %d
gen age_start = (start_date - Bdate)/365
replace age_start = . if age_start<21
replace age_start = . if age_start>60

replace education = "High School or less" if education=="High School" | education == "Grade School"
replace education = "Bachelors Degree or more" if education=="Bachelors Degree" | education == "Doctorate" | education == "Masters Degree"


keep ptbid voluntary_sep involuntary_sep leave_policing leave_other_job still_work sex race education age_start
gen female = sex == "Female"
gen Black = race == "African American/Black"
gen White = race == "Caucasian/White"
gen Hispanic = race == "Hispanic"
gen Other_race = race == "Asian/Oriental" | race =="Native American"
label var Other_race "Other race"
gen assoc = education=="Associates Degree"
label var assoc "Associates Degree"
gen BA = education =="Bachelors Degree or more"
label var BA "Bachelors Degree or more"
gen HS = education =="High School or less"
label var HS "High School or less"
gen col = education =="Some College"
label var col "Some College"
gen Unknown = education =="Unknown"
label var Unknown "Unknown education"

label var voluntary_sep  "Left" 
label var involuntary_sep  "Fired" 
label var still_work  "Still working" 
label var leave_policing  "Left IL" 
label var leave_other_job  "New job in IL" 

estpost sum still_work voluntary_sep involuntary_sep leave_policing leave_other_job  
est store all
estpost sum still_work voluntary_sep involuntary_sep leave_policing leave_other_job  if female==0
est store male
estpost sum still_work voluntary_sep involuntary_sep leave_policing leave_other_job  if female==1
est store fem
estpost sum   still_work voluntary_sep involuntary_sep leave_policing leave_other_job  if  Black  ==1
est store bl
estpost sum  still_work  voluntary_sep involuntary_sep leave_policing leave_other_job  if White ==1
est store wh
estpost sum  still_work  voluntary_sep involuntary_sep leave_policing leave_other_job  if Hispanic ==1
est store hisp
estpost sum  still_work  voluntary_sep involuntary_sep leave_policing leave_other_job  if assoc==1
est store asso
estpost sum   still_work voluntary_sep involuntary_sep leave_policing leave_other_job  if BA==1
est store BA
estpost sum   still_work voluntary_sep involuntary_sep leave_policing leave_other_job  if HS==1
est store HS
estpost sum   still_work voluntary_sep involuntary_sep leave_policing leave_other_job  if col==1
est store col


esttab all male fem wh bl hisp HS col asso BA using sociodemog.tex, replace ///
mtitles("All" "Male" "Female" "White" "Black" "Hispanic"  "HS" "College" "Assoc" "BA") ///
cells("mean(fmt(2))") label nonumber nodepvars  title("Sociodemographics and employment status, 5 years after start date at an agency" \label{sociodemog})         


esttab all male fem wh bl hisp HS col asso BA using sociodemog.rtf, replace ///
mtitles("All" "Male" "Female" "White" "Black" "Hispanic"  "HS" "College" "Assoc" "BA") ///
cells("mean(fmt(2))") label nonumber nodepvars  title("Table 2: Sociodemographics and employment status, 5 years after start date at an agency" \label{sociodemog})         


**********************************************************************
** Tab 1: Number of jobs people worked at, for those hired pre-2010 **
**********************************************************************

use	  "$data/IL_officer_data_anon", clear
by ptbid , sort: egen f_start = min(Y_start )
keep if f_start <= 2010

keep ptbid agency
duplicates drop 
duplicates tag ptbid, gen(nb_agencies)

label var nb_agencies "Number of agencies"
replace nb_agencies = nb_agencies + 1
replace nb_agencies = 4 if nb_agencies >4
tostring nb_agencies, replace 
replace nb_agencies = "4+" if nb_agencies=="4"
estpost tab nb_agencies
	est store a
esttab a using nb_agencies_FT.tex, nonumbers cells(pct(fmt(1))) sfmt(fmt(%12.2f))  /// 
replace label title("Percentage of officers that worked in 1, 2, 3 or 4+ agencies, among officers whose first Illinois policing job was in 2010 or earlier" \label{nbjobFT}) 

esttab a using nb_agencies_FT.rtf, nonumbers cells(pct(fmt(1))) sfmt(fmt(%12.2f))  /// 
replace label title("Table 1: Percentage of officers that worked in 1, 2, 3 or 4+ agencies, among officers whose first Illinois policing job was in 2010 or earlier" \label{nbjobFT}) 


**********************************************************
** Tab 3: Ranks when appointment vs. rank with promoted **
**********************************************************

 
use "$data/IL_rank_data_anon", clear

gen  Y = year(entry_date )
by ptbid, sort: egen first_start = min(Y_start) 
keep if Y_start  >=2000 


sort ptbid start_date
*** Ignore changes within agency from PT to FT, as long as the new hire is w/in 6months in the same agency 
sort ptbid agency start_date
gen length_to_new = start_date[_n+1] - sep_date  if ptbid == ptbid[_n+1] & agency == agency[_n+1] & status != status[_n+1]
gen status_change = length_to_new !=. & length_to_new <= 180
drop if status_change[_n-1]==1 
drop status_change 

** Drop stints of 0 or 1 day 
gen length = sep_date -start_date 
drop if length < 2

sort ptbid agency entry_date rank
replace notes = lower(notes)
replace rank = lower(rank) 

drop if formtype == "Separation"

replace rank = "sergeant" if strpos(notes, "promoted to sergeant") >0
gen rank_short = rank 

compress

* Source for grouping ranks: https://en.wikipedia.org/wiki/Police_ranks_of_the_United_States

replace rank_short = "police officer" if rank == "deputy" 
replace rank_short = "sergeant" if rank == "sergeant" | rank == "corporal"
replace rank_short = "detective" if rank == "detective" | rank == "investigator" /// 
| rank == "first sergeant" | rank == "special agent" | rank == "special agent in charge"
replace rank_short = "lieutenant" if rank == "lieutenant" | rank == "captain" | /// 
rank == "commander"  | rank == "major" | rank == "watch commander"
replace rank_short = "chief" if rank == "acting chief"   | rank == "superintendent" | rank == "sheriff" 
replace rank_short = "deputy chief" if rank == "chief deputy" | rank == "assistant chief" | rank == "deputy director" | rank =="under sheriff" 
replace rank_short = "other" if rank_short != "sergeant" & rank_short != "police officer" & rank_short != "deputy" ///
 & rank_short != "chief" & rank_short !="lieutenant"  & rank_short != "deputy chief" & rank_short != "detective"


drop if formtype  =="Status Change" & ptbid == ptbid[_n-1] & rank_short == rank_short[_n-1]

gen App = formtype =="Appointment" 
by ptbid , sort: egen Any_app = max(App)

drop if formtype  =="Status Change" & Any_app==0 & rank == "police officer"
drop if formtype  =="Status Change" & Any_app==0 & rank == "deputy"

gen one = 1

gen length_change = entry_date - start_date 
gen promotion = formtype == "Status Change" 

preserve 
by rank_short formtype, sort: egen NB = sum(one ) 
keep rank_short NB formtype
duplicates drop 
by rank_short, sort: egen sum = sum(NB)
gen pct = NB/sum*100
gen order = . 
replace order = 1 if rank_short == "police officer"
replace order = 2 if rank_short == "deputy"
replace order = 3 if rank_short == "detective"
replace order = 4 if rank_short == "sergeant"
replace order = 5 if rank_short == "lieutenant"
replace order = 6 if rank_short == "deputy chief"
replace order = 7 if rank_short == "chief"
replace order = 8 if rank_short == "Other"

gen i = 1
replace i = 2 if formtype =="Appointment"
drop formtype sum 
 reshape wide NB pct, i(rank_short ) j(i)

sort order  

gen N = NB1 + NB2
rename rank_short rank 
rename pct1 Promotion 
rename pct2 Appointment 
keep rank Promotion Appointment N
drop if rank == "other"

dataout, save(rank_nb)  tex  replace  dec(2) 
dataout, save(rank_nb)  word  replace  dec(2) 

restore 

********************************************
** Fig A1: Imputed vs. actual agency size **
********************************************

*** Data source on salaries & agency size for 2020: https://govsalaries.com/state/IL
** Keep places that have true info on number of people working there in 2020 -- we have this info for 250/889 agencies (=28%)

 use "$data/IL_pensions_anon", clear
 
 keep if EmployeesNumber !=. 

forvalues i = 2000(1)2021{
gen working_`i' =Y_start <= `i' & (Y_stop >= `i' | Y_stop == .)
gen quit_`i' = Y_stop == `i'
by agency, sort: egen nb_active`i' = sum(working_`i') 
by agency, sort: egen nb_quit`i' = sum(quit_`i') 
}

keep agency nb_active* nb_quit* Population EmployeesNumber 
 duplicates drop 
 
 
 label var nb_active2020 "imputed in 2020"
 label var nb_active2015 "imputed in 2015"
  label var nb_active2010 "imputed in 2010"
  label var EmployeesNumber "45 degree line"
  
  preserve 
  forvalues i = 2000(1)2021{
  replace nb_active`i' = log(nb_active`i')
replace nb_quit`i' = log(nb_quit`i')
}

  replace EmployeesNumber = log(EmployeesNumber)
  
  twoway (scatter nb_active2020 EmployeesNumber)  /// 
  (line EmployeesNumber EmployeesNumber), subtitle(2020) /// 
  ytitle("log(Imputed officer count)") xtitle("log(Official officer count)") legend(off) name("act2O20",replace)


    twoway  (scatter nb_active2015 EmployeesNumber) /// 
  (line EmployeesNumber EmployeesNumber), subtitle(2015) /// 
    ytitle("log(Imputed officer count)") xtitle("log(Official officer count)") legend(off) name("act2O15",replace)
  
    twoway (scatter nb_active2010 EmployeesNumber) /// 
  (line EmployeesNumber EmployeesNumber), subtitle(2010) /// 
    ytitle("log(Imputed officer count)") xtitle("log(Official officer count)") legend(off) name("act2O10",replace)
  
    twoway (scatter nb_active2005 EmployeesNumber) /// 
  (line EmployeesNumber EmployeesNumber), subtitle(2005) /// 
  ytitle("log(Imputed officer count)") xtitle("log(Official officer count)") legend(off) name("act2O05",replace)
  
  
  graph combine act2O20 act2O15 act2O10 act2O05, graphregion(color(white)) 

 
   graph export "FIG_A1_imputed_actual_size.png", replace
   
   
   restore 
   
**************************************
** Fig 4: Size of agency & turnover **
**************************************

 use "$data/IL_pensions_anon", clear

 sort ptbid start_date

gen voluntary_sep = sepreason=="Resigned" | sepreason=="Left for new job" | sepreason=="Retired" 
gen involuntary_sep = sepreason=="Terminated"  | sepreason=="Charged/Convicted of Crime" 
gen other_sep = sep_date!=. & involuntary_sep==0 & voluntary_sep == 0
** Drop stints of 0 or 1 day 
gen length = sep_date -start_date 
drop if length < 2

** Drop new stint at the same agency. 
gen x = ptbid ==ptbid[_n-1] & agency == agency[_n-1]
drop if ptbid ==ptbid[_n-1] & agency == agency[_n-1]

forvalues i = 2000(1)2021{
gen working_`i' =Y_start <= `i' & (Y_stop >= `i' | Y_stop == .)
gen quit_`i' = Y_stop == `i' & voluntary_sep == 1
by agency, sort: egen nb_active`i' = sum(working_`i') 
by agency, sort: egen nb_quit`i' = sum(quit_`i') 
}

keep agency nb_active* nb_quit* Population EmployeesNumber  
duplicates drop 

  
reshape long nb_active nb_quit, i(agency) j(year)
gen quit_rate = nb_quit/EmployeesNumber

 xtile size = EmployeesNumber  , nq(10)
 by size , sort: egen m = mean(EmployeesNumber ) 
 label var m "Number of employees"
 by size , sort: egen q = mean(quit_rate) 
 label var q "Separation rate"

 twoway (line q size if year == 2018,  c(l) yaxis(1) yscale(range(0 0.12) axis(1)) ylabel(0(0.05)0.12, notick axis(1))) /// 
 (bar m size if year == 2018, c(l) yaxis(2) fcolor(none) lcolor(black%85)  yscale(range(0 100) axis(2))),  xtitle(Decile of agency size) /// 
 legend(ring(1) position(6)  bplacement(south))  plotregion(margin(zero)) xlabel(1(1)10, notick) 
 graph export "FIG4_Agency_size_quit_2018.png", replace

******************************** 
** Fig 5: Turnover and salary **
******************************** 


 use "$data/IL_pensions_anon", clear

 keep if EmployeesNumber !=. 

 gen vol_leave = sepreason =="Left for new job" | sepreason =="Resigned" 
 

gen MedianSalary = payroll/EmployeesNumber*12

 
sort ptbid start_date
gen sal_change = MedianSalary[_n+1] - MedianSalary if ptbid ==ptbid[_n+1] & vol_leave==1
replace sal_change = . if  ptbid ==ptbid[_n+1] & agency==agency[_n+1]


sum sal_change , detail
local med= round(r(p50)) 

sum MedianSalary if ptbid ==ptbid[_n+1]  & agency!=agency[_n+1] & vol_leave==1, detail
local leave= round(r(p50)) 

sum MedianSalary if ptbid !=ptbid[_n+1]  | (ptbid ==ptbid[_n+1] & agency==agency[_n+1]), detail
local stay= round(r(p50)) 

gen leave = ptbid == ptbid[_n+1] & agency!=agency[_n+1]
gen higher = leave == 1 & MedianSalary[_n+1] > MedianSalary

sum higher if leave == 1 & vol_leave==1
local high=   round(100*r(mean))  

hist sal_change  , frac xtitle(Distribution of median salary changes) ytitle("Fraction") plotregion(margin(zero)) /// 
 xlabel(, format( %15.0fc) labsize(vsmall))
graph export "FIG3_salary_turnover.png", replace


 
************************************* 
** Fig 5: Turnover and agency size **
*************************************


 use "$data/IL_pensions_anon", clear

 keep if EmployeesNumber !=. 

 gen vol_leave = sepreason =="Left for new job" | sepreason =="Resigned" 
  
sort ptbid start_date
gen sal_change = EmployeesNumber[_n+1] - EmployeesNumber if ptbid ==ptbid[_n+1] & vol_leave==1
replace sal_change = . if  ptbid ==ptbid[_n+1] & agency==agency[_n+1]


sum sal_change , detail
local med= round(r(p50)) 

sum EmployeesNumber if ptbid ==ptbid[_n+1]  & agency!=agency[_n+1] & vol_leave==1, detail
local leave= round(r(p50)) 

sum EmployeesNumber if ptbid !=ptbid[_n+1]  | (ptbid ==ptbid[_n+1] & agency==agency[_n+1]), detail
local stay= round(r(p50)) 

gen leave = ptbid == ptbid[_n+1] & agency!=agency[_n+1]
gen higher = leave == 1 & EmployeesNumber[_n+1] > EmployeesNumber

sum higher if leave == 1 & vol_leave==1
local high=   round(100*r(mean))  

replace sal_change = 400 if sal_change >400 & sal_change !=. 
replace sal_change = -400 if sal_change <-400 & sal_change !=. 


hist sal_change , frac xtitle(Distribution of changes in number of officers) ytitle("Fraction") plotregion(margin(zero))

 graph export "FIG5_size_turnover.png", replace


